# author: flavien ganter
# date: december 25, 2019
# r function that calculates amceps, dcsps, and pairwise selection probabilities
# for paired forced-choice conjoint experiments
# v6

amcep <- function(formula,
                  data,         
                  tasks,
                  type = "amcep",
                  by = NULL,
                  adjust = FALSE,
                  id = NULL
) {
  
  
  ## packages
  require(sandwich)
  require(stringr)
  "%nin%" <- Negate("%in%")
  
  
  #####################################
  ########### PRELIMINARIES ###########
  #####################################
  
  
  ## read & check arguments
  
  # outcome
  var.y <- all.vars(stats::update(formula, . ~ 0))
  if (var.y == ".") {
    stop("No outcome variable specified.")
  }
  if (class(data[, match(var.y, names(data))]) %nin% c("numeric", "integer")) {
    stop("The outcome variable should be either numeric or integer.")
  }
  if (all(names(table(data[, match(var.y, names(data))])) != c("0", "1"))) {
    stop("The outcome variable should be a dummy variable with values 0 and 1.")
  }
  outcome <- as.numeric(as.character(data[, match(var.y, names(data))]))
  
  # attributes
  var.x <- all.vars(stats::update(formula, 0 ~ . ))
  if (any(lapply(data[, match(var.x, names(data))], class) %nin% c("factor", "numeric"))) {
    stop("All variables should be either numeric or factor.")
  } 
  
  # auxiliary parameters
  
  # type
  if (type %in% c("amcep", "p") & !is.null(by)) {
    stop("Subgroups can only be specified for the DCSP calculation.")
  }
  if (type == "dcsp" & is.null(by)) {
    stop("No subgroup specified.")
  }
  
  # by
  # if "by" is not specified, replace by a vector of 1
  if (is.null(by)) {
    subgroup <- rep(1, nrow(data))
  } else {
    subgroup <- as.vector(data[, match(by, names(data))])
  }
  
  # id
  # if "id" is not specified, replace by the task id (no clustering)
  if (is.null(id)) {
    clust <- as.vector(data[, match(tasks, names(data))])
  } else {
    clust <- as.vector(data[, match(id, names(data))])
  }
  
  
  ## create base data sets
  
  # base long data set
  data.long <- data.frame(outcome,
                          as.vector(data[, match(tasks, names(data))]),
                          clust, subgroup, data[, match(var.x, names(data))])
  names(data.long) <- c("outcome", "tasks", "clust", "subgroup",
                        names(data)[match(var.x, names(data))])
  if (!is.null(by)) {
    
    # check if subgroup variable is of the right format
    if (class(data.long[["subgroup"]]) != "factor") {
      stop("The subgroup variable is not a factor.")
    }
    if (nlevels(data.long[["subgroup"]]) != 2) {
      stop("Two, and only two, subgroups are allowed to calculate preference difference between subgroups.")
    }
    
    # check if subgroup variable is missing for some observations
    if (sum(is.na(data.long[["subgroup"]])) != 0) {
      warning("The subgroup vector contains missing values. Corresponding observations have been omitted.")
    }
    data.long <- data.long[!is.na(data.long[["subgroup"]]),]
  }
  
  # store labels and replace them by numbers (for convenience)
  var.levels <- vector(mode = "list", length = length(var.x))
  for (i in 1:length(var.x)) {
    var <- var.x[i]
    if (class(data.long[[var]]) == "factor") {
      var.levels[[i]] <- levels(data.long[[var]])
      levels(data.long[[var]]) <- 1:nlevels(data.long[[var]])
    } else {
      var.levels[[i]] <- NA
    }
  }
  subgroup.levels <- levels(data.long$subgroup)
  levels(data.long$subgroup) <- c("1", "2")
  
  # reshape base data set, to wide
  data.long$option <- ave(as.numeric(data.long[["tasks"]]),
                          data.long[["tasks"]], data.long[["clust"]],
                          FUN = seq_along)
  data.wide <- data.long[data.long$option == 1, -ncol(data.long)]
  colnames(data.wide)[c(1, 5:ncol(data.wide))] <-
    paste0(colnames(data.wide)[c(1, 5:ncol(data.wide))], ".1")
  data.wide.compl <- data.long[data.long$option == 2, c(2:(ncol(data.long)-1))]
  colnames(data.wide.compl)[4:ncol(data.wide.compl)] <-
    paste0(colnames(data.wide.compl)[4:ncol(data.wide.compl)], ".2")
  data.wide <- base::merge(data.wide, data.wide.compl, by = c("tasks", "clust", "subgroup"))
  if (sum(complete.cases(data.wide)) < nrow(data.wide)) {
    warning("Some observations have been omitted due to missing values.")
    data.wide <- data.wide[complete.cases(data.wide),]
  }
  
  
  ## detect constraints
  
  # detect constrained variables
  # this section is based on segments of the cregg's package code
  formula_terms <- terms(formula)
  if (any(attr(formula_terms, "order") >= 2)) {
    constrained_terms <- attr(formula_terms, "factors")[, attr(formula_terms, "order") == 2,
                                                        drop = FALSE]
    constrained_vars <- colnames(constrained_terms)
    constraints <- lapply(constrained_vars, function(tmp) {
      as.formula(paste0("~", tmp))
    })
    constrained_vars <- unlist(lapply(constraints, all.vars))
    if (any(duplicated(constrained_vars))) {
      stop("All variables in constraints must be unique and constraints may only be two-way.")
    }
    constrained_vars <- unique(constrained_vars)
    unconstrained_vars <- var.x[!var.x %in% constrained_vars]
  } else {
    constraints <- NULL
    constrained_vars <- NULL
    unconstrained_vars <- var.x
  }
  
  # detect constrained levels
  if (length(constrained_vars)) {
    restrvar.1 <- NULL
    restrvar.2 <- NULL
    constraints.1 <- NULL
    constraints.2 <- NULL
    for (pair in colnames(constrained_terms)) {
      pair.nb <- match(pair, colnames(constrained_terms))
      variable1 <- all.vars(as.formula(paste0("~", pair)))[1]
      variable2 <- all.vars(as.formula(paste0("~", pair)))[2]
      obs.comb <- data.frame(data.long[[variable1]], data.long[[variable2]])
      obs.comb <- obs.comb[!duplicated(obs.comb),]
      cooc.mat <- as.matrix(table(obs.comb[,1], obs.comb[,2]))
      if (length(unique(rowSums(cooc.mat))) != 2 | length(unique(colSums(cooc.mat))) != 2) {
        stop("Constrained variables are not properly defined. There is either no constraints or too
             many constraints.")
      }
      restrvar.1[[pair.nb]] <- variable1
      restrvar.2[[pair.nb]] <- variable2
      constraints.1[[pair.nb]] <- names(rowSums(cooc.mat)[which(rowSums(cooc.mat) == min(rowSums(cooc.mat)))])
      constraints.2[[pair.nb]] <- names(colSums(cooc.mat)[which(colSums(cooc.mat) == min(colSums(cooc.mat)))])
    }
    constraints.levels <- list(restrvar.1, restrvar.2, constraints.1, constraints.2)
  } else {
    constraints.levels <- NULL
  }
  
  
  ## create working data set
  data.prep <- data.frame(outcome = data.wide[["outcome.1"]])
  
  # create contrasting variables
  for (var in var.x) {
    k.cons <- match(var, var.x)
    variable.1 <- paste(var, "1", sep = ".")
    variable.2 <- paste(var, "2", sep = ".")
    
    # continuous variables
    # difference between profile 1 and profile 2
    if (class(data.long[[var]]) == "numeric") {
      var.temp <- data.wide[[variable.1]] - data.wide[[variable.2]]
      data.prep <- cbind(data.prep, var.temp)
      colnames(data.prep)[match("var.temp", names(data.prep))] <- var
      
    # categorical variables (V_ij variables)
    # 1 = profile 1 has the focal level and profile 2 a different level,
    # -1 = profile 2 has the focal level and profile 1 a different level,
    # 0 = both or none have the focal level
    } else {
      comb <- combn(levels(as.factor(data.wide[[paste0(var, ".1")]])), 2)
      for (c in 1:ncol(comb)) {
        var.temp <- ifelse(as.character(data.wide[[variable.1]]) == comb[1,c] &
                             as.character(data.wide[[variable.2]]) == comb[2,c], 1,
                           ifelse(as.character(data.wide[[variable.1]]) == comb[2,c] &
                                    as.character(data.wide[[variable.2]]) == comb[1,c], -1, 0))
        data.prep <- cbind(data.prep, var.temp)
        colnames(data.prep)[match("var.temp", names(data.prep))] <- paste0(var, ".", comb[1,c],
                                                                           "-", comb[2,c])
      }
    }
  }
  
  # add subgroup and cluster id
  data.prep <- cbind(data.prep, data.wide[, c("subgroup", "clust")])
  
  
  
  #############################
  ########### AMCEP ###########
  #############################
  if (type == "amcep") {
    
    data.prep <- data.prep[, -match("subgroup", colnames(data.prep))]
    
    
    ## create regression data set
    
    # Under conditionally independent randomization, each AMCEP has to
    # be computed based on estimates from a separate model. In order to obtain a
    # variance-covariance matrix, however, all models should be estimated at the same
    # time. To do this, a data set with the relevant variables and an intercept is
    # created for each model. Then, all of these data sets are used as blocks in a
    # block-diagonal matrix, on which an OLS regression can be run. This matrix is of
    # dimension (M*N) * (2 + M + sum(v_m)), where M is the number of separate models
    # to be estimated, N the number of pairs of profiles compared during the experiment,
    # and v_m the number of variables to be estimated in model m. The term 2 represents
    # the outcome and the cluster id variables.
    
    # empty data set
    N.reg_constrained <- 0
    if (length(constrained_vars)) {
      for (var in constrained_vars) {
        N.reg_constrained <- N.reg_constrained + (nlevels(data.long[[var]]) - 1)
      }
    }
    N.subdata <- (length(unconstrained_vars) > 0) * 1 + N.reg_constrained
    nb.col <- 2 + N.subdata + N.subdata * (ncol(data.prep) - 2)
    data.model <- data.frame(matrix(ncol = nb.col, nrow = 0))
    
    # column names (block-diagonal matrix)
    # c_ = nuisance variable (included to increase precision)
    # ..u = unconstrained variables
    # k = attribute id
    # m = level id
    base.names <- names(data.prep)[-c(1, ncol(data.prep))]
    if (length(unconstrained_vars) > 0 & length(constrained_vars) == 0) {
      namesvar.data.model <- paste0(base.names, "..")
    } else {
      if (length(unconstrained_vars)) {
        base.names.temp <- sapply(base.names, function(x) {
          if (str_extract(x, ".+?(?=\\.)") %in% constrained_vars) {
            return(paste0("c_", x, "..u"))
          } else {
            return(paste0(x, "..u"))
          }
        })
        namesvar.data.model <- base.names.temp
      }
      for (k in 1:length(constrained_vars)) {
        var <- constrained_vars[k]
        for (m in 2:nlevels(data.long[[var]])) {
          base.names.temp <- sapply(base.names, function(x) {
            if (str_extract(paste0(x, "."), ".+?(?=\\.)") != var) {
              return(paste0("c_", x, "..", k, ".", m))
            } else {
              return(paste0(x, "..", k, ".", m))
            }
          })
          namesvar.data.model <- c(namesvar.data.model, base.names.temp)
        }
      }
    }
    names(data.model) <- c("outcome", "clust", paste0("intercept.", seq(1:N.subdata)),
                           namesvar.data.model)
    n.subdata <- 1
    
    # unconstrained variables block
    # fill the columns that are outside the block with zeros
    if (length(unconstrained_vars)) {
      
      data.model.temp <- data.frame(outcome = data.prep[["outcome"]],
                                    clust = data.prep[["clust"]])
      
      # intercepts
      for (sub in 1:N.subdata) {
        data.model.temp$intercept.temp <- ifelse(n.subdata == sub, 1, 0)
        names(data.model.temp)[2+sub] <- paste0("intercept.", sub)
      }
      
      # variables
      data.model.temp <- cbind(data.model.temp, data.prep[, -c(1,ncol(data.prep))])
      zero <- matrix(0, ncol = ncol(data.model) - ncol(data.model.temp),
                     nrow = nrow(data.model.temp))
      
      # merge intercepts and variables
      data.model.temp <- cbind(data.model.temp, as.data.frame(zero))
      colnames(data.model.temp) <- colnames(data.model)
      
      # fill the empty data set
      data.model <- rbind(data.model, data.model.temp)
      
      n.subdata <- 2
      
    }
    
    # constrained variables block
    if (length(constrained_vars)) {
      
      # loop over attributes
      for (k in 1:length(constrained_vars)) {
        
        var <- constrained_vars[k]
        pair.nb <- match(var, str_extract(colnames(constrained_terms), var))
        pair.vars <- all.vars(as.formula(paste0("~", colnames(constrained_terms)[pair.nb])))
        var1.nb <- ifelse(constraints.levels[[1]][[pair.nb]] == var, 3, 4)
        var2.nb <- ifelse(constraints.levels[[1]][[pair.nb]] == var, 4, 3)
        var1.levels <- as.character(1:nlevels(as.factor(data.long[[var]])))
        variableX.1 <- paste0(var, ".1")
        variableX.2 <- paste0(var, ".2")
        variableO.1 <- paste0(pair.vars[pair.vars != var], ".1")
        variableO.2 <- paste0(pair.vars[pair.vars != var], ".2")
        
        # loop over focal attribute's levels
        for (cat.var1 in var1.levels[-1]) {
          
          # base data
          data.model.temp <- data.frame(outcome = data.prep[["outcome"]],
                                        clust = data.prep[["clust"]])
          
          # intercepts
          for (sub in 1:N.subdata) {
            data.model.temp$intercept.temp <- ifelse(n.subdata == sub, 1, 0)
            names(data.model.temp)[2+sub] <- paste0("intercept.", sub)
          }
          
          # variables
          zero <- matrix(0, ncol = (n.subdata - 1) *  (ncol(data.prep) - 2),
                         nrow = nrow(data.model.temp))
          data.model.temp <- cbind(data.model.temp, as.data.frame(zero))
          data.model.temp <- cbind(data.model.temp, data.prep[,-c(1,ncol(data.prep))])
          zero <- matrix(0, ncol = ncol(data.model) - ncol(data.model.temp),
                         nrow = nrow(data.model.temp))
          data.model.temp <- cbind(data.model.temp, as.data.frame(zero))
          colnames(data.model.temp) <- colnames(data.model)
          
          # select relevant observations
          data.model.temp <- cbind(data.model.temp,
                                   data.wide[, which(str_extract(colnames(data.wide),
                                                                 ".+?(?=\\.)") %in% pair.vars)])
          
            # flag empty counterfactuals
            empt.countf <- ifelse(
              # first type of counterfactual
              (((data.model.temp[[variableX.1]] == cat.var1 & data.model.temp[[variableX.2]] == var1.levels[1]) &
                  ((cat.var1 %in% constraints.levels[[var1.nb]][[pair.nb]] &
                      data.model.temp[[variableO.2]] %in% constraints.levels[[var2.nb]][[pair.nb]]) |
                     var1.levels[1] %in% constraints.levels[[var1.nb]][[pair.nb]] &
                     data.model.temp[[variableO.1]] %in% constraints.levels[[var2.nb]][[pair.nb]])) |
                 ((data.model.temp[[variableX.1]] == var1.levels[1] & data.model.temp[[variableX.2]] == cat.var1) &
                    ((var1.levels[1] %in% constraints.levels[[var1.nb]][[pair.nb]] &
                        data.model.temp[[variableO.2]] %in% constraints.levels[[var2.nb]][[pair.nb]]) |
                       cat.var1 %in% constraints.levels[[var1.nb]][[pair.nb]] &
                       data.model.temp[[variableO.1]] %in% constraints.levels[[var2.nb]][[pair.nb]]))) |
                # second type of counterfactual
                (((data.model.temp[[variableX.1]] == cat.var1 & data.model.temp[[variableX.2]] != var1.levels[1]) &
                    (var1.levels[1] %in% constraints.levels[[var1.nb]][[pair.nb]] &
                       data.model.temp[[variableO.1]] %in% constraints.levels[[var2.nb]][[pair.nb]])) |
                   ((data.model.temp[[variableX.1]] == var1.levels[1] & data.model.temp[[variableX.2]] != cat.var1) &
                      (cat.var1 %in% constraints.levels[[var1.nb]][[pair.nb]] &
                         data.model.temp[[variableO.1]] %in% constraints.levels[[var2.nb]][[pair.nb]])) |
                   ((data.model.temp[[variableX.1]] != var1.levels[1] & data.model.temp[[variableX.2]] == cat.var1) &
                      (var1.levels[1] %in% constraints.levels[[var1.nb]][[pair.nb]] &
                         data.model.temp[[variableO.2]] %in% constraints.levels[[var2.nb]][[pair.nb]])) |
                   ((data.model.temp[[variableX.1]] != cat.var1 & data.model.temp[[variableX.2]] == var1.levels[1]) &
                      (cat.var1 %in% constraints.levels[[var1.nb]][[pair.nb]] &
                         data.model.temp[[variableO.2]] %in% constraints.levels[[var2.nb]][[pair.nb]])))
              , 1, 0)
            
            # flag relevant observations
            select <- ifelse((data.model.temp[[variableX.1]] %in% c(var1.levels[1], cat.var1) |
                                data.model.temp[[variableX.2]] %in% c(var1.levels[1], cat.var1)) &
                               data.model.temp[[variableX.1]] != data.model.temp[[variableX.2]] &
                               empt.countf == 0, 1, 0)
            
            # remove irrelevant observations and variables
            data.model.temp <- data.model.temp[select == 1,
                                               -match(c(variableX.1, variableX.2,
                                                        variableO.1, variableO.2),
                                                      colnames(data.model.temp))]
          
          # append to block-diagonal matrix
          data.model <- rbind(data.model, data.model.temp)
          
          n.subdata <- n.subdata + 1
          
        }
      }
    }
    
    # delete controls (if applicable) and columns with zeros only
    if (adjust == FALSE & length(constrained_vars) > 0) {
      data.model <- data.model[, -which(substr(colnames(data.model), 1, 2) == "c_")]
    }
    data.model <- data.model[, apply(data.model, 2, function(x) !all(x == 0))]
    
    
    ## estimation
    
    # estimate model on big block-diagonal matrix
    model <- lm(outcome ~ 0 + ., data = data.model[, -match("clust", names(data.model))])
    
    # export coefficients
    coef <- coef(model)[-c(1:N.subdata)]
    coef.df <- data.frame(name = gsub("`", '', names(coef)),
                          coef,
                          variable = NA,
                          level1 = NA,
                          level2 = NA,
                          var.amcep = NA,
                          level.amcep = NA)
    coef.df <- coef.df[substr(coef.df$name, 1, 2) != "c_",]
    coef.df$variable <- str_extract(coef.df$name, ".+?(?=\\.)")
    coef.df$level1 <- str_extract(coef.df$name, "(?<=\\.).+?(?=\\-)")
    coef.df$level2 <- str_extract(coef.df$name, "(?<=\\-).+?(?=\\.\\.)")
    coef.df$var.amcep <- str_extract(coef.df$name, "(?<=\\.\\.).+?(?=\\.)")
    coef.df$var.amcep <- ifelse(is.na(coef.df$var.amcep), 0, coef.df$var.amcep)
    coef.df$level.amcep <- str_extract(str_extract(coef.df$name, "(?<=\\.\\.).+"), "(?<=\\.).+")
    coef.df$level.amcep <- ifelse(is.na(coef.df$level.amcep), 0, coef.df$level.amcep)
    
    # clustered variance-covariance matrix
    # when respondents are given several tasks, we only need to cluster at the
    # respondent level; the task level can be ignored since it is nested in the
    # respondent level
    N <- nrow(data.model)           
    r <- model$rank
    N.id <- length(unique(data.model$clust))
    dfc <- (N.id/(N.id - 1)) * ((N - 1)/(N - r))
    uj  <- apply(estfun(model), 2, function(x) tapply(x, as.character(data.model$clust), sum))
    vcov <- dfc * sandwich(model, meat = crossprod(uj)/N)
    vcov <- vcov[-c(1:N.subdata), -c(1:N.subdata)]
    
    # remove controls' coefficients
    if (adjust == TRUE) {
      id.controls <- which(substr(gsub("`", '', colnames(vcov)), 1, 2) == "c_")
      coef <- coef[-id.controls]
      vcov <- vcov[-id.controls,-id.controls]
    }
    
    
    ## amcep weights
    
    # AMCEPs are linear combinations of the coefficients previously estimated. This
    # section constructs a matrix of weights, in which each column is the vector of
    # coefficients of one AMCEP. AMCEPs will eventually be obtained by multiplying the
    # matrix of coefficients and the vector of OLS estimates. The variance-covariance
    # matrix is calculated by multiplying the matrix of coefficients and the
    # variance-covariance matrix of the OLS estimates.
    
    calc.wt <- NULL
    
    # loop over attributes
    for (var in var.x) {
      
      # continuous attribute
      if (class(data.long[[var]]) == "numeric") {
        calc.wt.temp <- ifelse(coef.df[["variable"]] == var, 1, 0)
        calc.wt <- as.data.frame(cbind(calc.wt, calc.wt.temp))
        colnames(calc.wt)[match("calc.wt.temp", names(calc.wt))] <- var
        
      # categorical attribute
      } else {
        
        var.nb <- ifelse(is.na(match(var, constrained_vars)), 0, match(var, constrained_vars))
        var1.levels <- as.character(1:nlevels(as.factor(data.long[[var]])))
        
        # loop over attribute's levels
        for (cat.var1 in var1.levels[-1]) {
          
          # determine sign of reference level component
          ref.wt.temp <- ifelse(coef.df[["variable"]] == var &
                                  coef.df[["level1"]] == var1.levels[1] &
                                  coef.df[["var.amcep"]] == var.nb &
                                  (coef.df[["level.amcep"]] == cat.var1 |
                                     var.nb == 0), 1,
                                ifelse(coef.df[["variable"]] == var &
                                         coef.df[["level2"]] == var1.levels[1] &
                                         coef.df[["var.amcep"]] == var.nb &
                                         (coef.df[["level.amcep"]] == cat.var1 |
                                            var.nb == 0), -1, 0))
          
          # determine sign of focal level component
          foc.wt.temp <- ifelse(coef.df[["variable"]] == var &
                                  coef.df[["level1"]] == cat.var1 &
                                  coef.df[["var.amcep"]] == var.nb &
                                  (coef.df[["level.amcep"]] == cat.var1 |
                                     var.nb == 0), 1,
                                ifelse(coef.df[["variable"]] == var &
                                         coef.df[["level2"]] == cat.var1 &
                                         coef.df[["var.amcep"]] == var.nb &
                                         (coef.df[["level.amcep"]] == cat.var1 |
                                            var.nb == 0), -1, 0))
          
          # calculate total weight
          calc.wt.temp <- 1/(length(var1.levels) - 1) * (foc.wt.temp - ref.wt.temp)
          calc.wt <- as.data.frame(cbind(calc.wt, calc.wt.temp))
          colnames(calc.wt)[match("calc.wt.temp", names(calc.wt))] <- paste(var, cat.var1, sep = ".")
          
        }
      }
      
    }
    
    
    ## amcep calculation
    
    # coefficients names
    # replace labels' aliases by labels
    amcep.names <- NULL
    for (var in var.x) {
      if (class(data.long[[var]]) != "factor") {
        amcep.names <- c(amcep.names, var)
      } else {
        for (cat in 2:length(var.levels[[match(var, var.x)]])) {
          amcep.names <- c(amcep.names, paste(var, var.levels[[match(var, var.x)]][cat],
                                              sep = "."))
        }
      }
    }
    
    # convert the matrix of coefficients (a data frame) into a matrix object
    calc.wt <- as.matrix(calc.wt)
    
    # point estimates
    amcep.coef <- t(calc.wt) %*% coef
    amcep.coef <- as.vector(amcep.coef)
    names(amcep.coef) <- amcep.names
    
    # variance covariance matrix
    amcep.vcov <- t(calc.wt) %*% vcov %*% calc.wt
    colnames(amcep.vcov) <- amcep.names
    rownames(amcep.vcov) <- amcep.names
    
    # output
    if (!is.null(constraints.levels)) {
      for (res in 1:length(constraints.levels[[1]])) {
        res.1 <- constraints.levels[[1]][res]
        res.2 <- constraints.levels[[2]][res]
        constraints.levels[[3]][[res]] <- var.levels[[match(res.1, var.x)]][as.numeric(constraints.levels[[3]][[res]])]
        constraints.levels[[4]][[res]] <- var.levels[[match(res.2, var.x)]][as.numeric(constraints.levels[[4]][[res]])]
      }
    }
    out <- list(estimate = amcep.coef,
                vcov = amcep.vcov,
                estimand = "amcep",
                subgroups = NULL,
                n.profiles = nrow(data.wide) * 2,
                n.tasks = nrow(data.wide),
                n.respondents = length(unique(data.model$clust)),
                constraints = constraints.levels)
    
    
    
    ############################
    ########### DCSP ###########
    ############################
  } else if (type == "dcsp") {
    
    
    ## create regression data set
    
    # Under conditionally independent randomization, each attribute's DCSPs have to
    # be computed based on estimates from two separate models---one for each subgroup.
    # In order to obtain a variance-covariance matrix, however, all models should be
    # estimated at the same time. To do this, a data set with the relevant variables
    # and an intercept is created for each model. Then, all of these data sets are
    # used as blocks in a block-diagonal matrix, on which an OLS regression can be
    # run. This matrix is of dimension (M*N) * (2 + M + sum(v_m)), where M is the
    # number of separate models to be estimated, N the number of pairs of profiles
    # compared during the experiment, and v_m the number of variables to be estimated
    # in model m. The term 2 represents the outcome and the cluster id variables.
    
    # empty data set
    N.subdata <- 2 * ((length(unconstrained_vars) > 0) * 1 + length(constrained_vars))
    nb.col <- 2 + N.subdata + N.subdata * (ncol(data.prep) - 3)
    data.model <- data.frame(matrix(ncol = nb.col, nrow = 0))

    # column names (block-diagonal matrix)
    # c_ = nuisance variable (included to increase precision)
    # ..u = unconstrained variables
    # k = attribute id
    base.names <- names(data.prep)[-c(1, ncol(data.prep)-1, ncol(data.prep))]
    if (length(unconstrained_vars) > 0 & length(constrained_vars) == 0) {
      namesvar.data.model <- paste0(base.names, "..")
    } else {
      if (length(unconstrained_vars)) {
        base.names.temp <- sapply(base.names, function(x) {
          if (str_extract(x, ".+?(?=\\.)") %in% constrained_vars) {
            return(paste0("c_", x, "..u"))
          } else {
            return(x)
          }
        })
        namesvar.data.model <- c(paste0(base.names.temp, "_1"), paste0(base.names.temp, "_2"))
      }
      for (k in 1:length(constrained_vars)) {
        var <- constrained_vars[k]
        base.names.temp <- sapply(base.names, function(x) {
          if (str_extract(paste0(x, "."), ".+?(?=\\.)") != var) {
            return(paste0("c_", x, "..", k))
          } else {
            return(x)
          }
        })
        base.names.temp_1 <- paste0(base.names.temp, "_1")
        base.names.temp_2 <- paste0(base.names.temp, "_2")
        namesvar.data.model <- c(namesvar.data.model, base.names.temp_1, base.names.temp_2)
      }
    }
    names(data.model) <- c("outcome", "clust", paste0("intercept.", seq(1:N.subdata)), namesvar.data.model)
    n.subdata <- 1
    
    # unconstrained variables block
    # fill the columns that are outside the block with zeros
    if (length(unconstrained_vars)) {
      
      # subgroup 1
      data.model.temp_1 <- data.frame(outcome = data.prep[["outcome"]],
                                      clust = data.prep[["clust"]])
      
        # intercepts
        for (sub in 1:N.subdata) {
          data.model.temp_1$intercept.temp <- ifelse(n.subdata == sub, 1, 0)
          names(data.model.temp_1)[2+sub] <- paste0("intercept.", sub)
        }
      
        # variables
        data.model.temp_1 <- cbind(data.model.temp_1, data.prep[,-c(1,ncol(data.prep)-1,
                                                                    ncol(data.prep))])
        zero <- matrix(0, ncol = ncol(data.model) - ncol(data.model.temp_1),
                       nrow = nrow(data.model.temp_1))
        data.model.temp_1 <- cbind(data.model.temp_1, as.data.frame(zero))
        colnames(data.model.temp_1) <- colnames(data.model)
        
        # fill the empty data set
        data.model.temp_1 <- data.model.temp_1[data.prep$subgroup == "1",]
        data.model <- rbind(data.model, data.model.temp_1)
      
      n.subdata <- 2
      
      # subgroup 2
      data.model.temp_2 <- data.frame(outcome = data.prep[["outcome"]],
                                      clust = data.prep[["clust"]])
      
        # intercepts
        for (sub in 1:N.subdata) {
          data.model.temp_2$intercept.temp <- ifelse(n.subdata == sub, 1, 0)
          names(data.model.temp_2)[2+sub] <- paste0("intercept.", sub)
        }
      
        # variables
        zero <- matrix(0, ncol = (n.subdata - 1) *  (ncol(data.prep) - 3), nrow = nrow(data.model.temp_2))
        data.model.temp_2 <- cbind(data.model.temp_2, as.data.frame(zero))
        data.model.temp_2 <- cbind(data.model.temp_2, data.prep[,-c(1,ncol(data.prep)-1,ncol(data.prep))])
        zero <- matrix(0, ncol = ncol(data.model) - ncol(data.model.temp_2), nrow = nrow(data.model.temp_2))
        data.model.temp_2 <- cbind(data.model.temp_2, as.data.frame(zero))
        colnames(data.model.temp_2) <- colnames(data.model)
       
        # append to block-diagonal matrix
        data.model.temp_2 <- data.model.temp_2[data.prep$subgroup == "2",]
        data.model <- rbind(data.model, data.model.temp_2)
      
      n.subdata <- 3
    }
    
    # constrained variables block
    # fill the columns that are outside of the block with zeros
    if (length(constrained_vars)) {
      
      # loop over attributes
      for (k in 1:length(constrained_vars)) {
        
        var <- constrained_vars[k]
        pair.nb <- match(var, str_extract(colnames(constrained_terms), var))
        pair.vars <- all.vars(as.formula(paste0("~", colnames(constrained_terms)[pair.nb])))
        var1.nb <- ifelse(constraints.levels[[1]][[pair.nb]] == var, 3, 4)
        var2.nb <- ifelse(constraints.levels[[1]][[pair.nb]] == var, 4, 3)
        var1.levels <- as.character(1:nlevels(as.factor(data.long[[var]])))
        variableX.1 <- paste0(var, ".1")
        variableX.2 <- paste0(var, ".2")
        variableO.1 <- paste0(pair.vars[pair.vars != var], ".1")
        variableO.2 <- paste0(pair.vars[pair.vars != var], ".2")
        
        # subgroup 1
        
          # base data
          data.model.temp_1 <- data.frame(outcome = data.prep[["outcome"]],
                                          clust = data.prep[["clust"]])
          
            # intercepts
            for (sub in 1:N.subdata) {
              data.model.temp_1$intercept.temp <- ifelse(n.subdata == sub, 1, 0)
              names(data.model.temp_1)[2+sub] <- paste0("intercept.", sub)
            }
            
            # variables
            zero <- matrix(0, ncol = (n.subdata - 1) * (ncol(data.prep) - 3), nrow = nrow(data.model.temp_1))
            data.model.temp_1 <- cbind(data.model.temp_1, as.data.frame(zero))
            data.model.temp_1 <- cbind(data.model.temp_1, data.prep[,-c(1,ncol(data.prep)-1,ncol(data.prep))])
            zero <- matrix(0, ncol = ncol(data.model) - ncol(data.model.temp_1), nrow = nrow(data.model.temp_1))
            data.model.temp_1 <- cbind(data.model.temp_1, as.data.frame(zero))
            colnames(data.model.temp_1) <- colnames(data.model)
            
            # select relevant observations
            data.model.temp_1 <- data.model.temp_1[data.prep$subgroup == "1",]
            data.model.temp_1 <- cbind(data.model.temp_1,
                                       data.wide[data.prep$subgroup == "1",
                                                 which(str_extract(colnames(data.wide), ".+?(?=\\.)") %in% pair.vars)])
            
            # flag empty counterfactuals
            empt.countf <- ifelse(
              (data.model.temp_1[[variableX.1]] %in% constraints.levels[[var1.nb]][[pair.nb]] &
                 data.model.temp_1[[variableO.2]] %in% constraints.levels[[var2.nb]][[pair.nb]]) |
                (data.model.temp_1[[variableX.2]] %in% constraints.levels[[var1.nb]][[pair.nb]] &
                   data.model.temp_1[[variableO.1]] %in% constraints.levels[[var2.nb]][[pair.nb]])
              , 1, 0)
            
            # remove irrelevant observations and variables
            data.model.temp_1 <- data.model.temp_1[empt.countf == 0,
                                                   -match(c(variableX.1, variableX.2, variableO.1, variableO.2),
                                                          colnames(data.model.temp_1))]
            
            # append to block-diagonal matrix
            data.model <- rbind(data.model, data.model.temp_1)
            
          n.subdata <- n.subdata + 1
        
        # subgroup 2
        
            # base data
            data.model.temp_2 <- data.frame(outcome = data.prep[["outcome"]],
                                            clust = data.prep[["clust"]])
            
            # intercepts
            for (sub in 1:N.subdata) {
              data.model.temp_2$intercept.temp <- ifelse(n.subdata == sub, 1, 0)
              names(data.model.temp_2)[2+sub] <- paste0("intercept.", sub)
            }
            
            # variables
            zero <- matrix(0, ncol = (n.subdata - 1) *  (ncol(data.prep) - 3), nrow = nrow(data.model.temp_2))
            data.model.temp_2 <- cbind(data.model.temp_2, as.data.frame(zero))
            data.model.temp_2 <- cbind(data.model.temp_2, data.prep[,-c(1,ncol(data.prep)-1,ncol(data.prep))])
            zero <- matrix(0, ncol = ncol(data.model) - ncol(data.model.temp_2), nrow = nrow(data.model.temp_2))
            data.model.temp_2 <- cbind(data.model.temp_2, as.data.frame(zero))
            colnames(data.model.temp_2) <- colnames(data.model)
            
            # select relevant observations
            data.model.temp_2 <- data.model.temp_2[data.prep$subgroup == "2",]
            data.model.temp_2 <- cbind(data.model.temp_2,
                                       data.wide[data.prep$subgroup == "2",
                                                 which(str_extract(colnames(data.wide), ".+?(?=\\.)") %in% pair.vars)])
            
            # flag empty counterfactuals
            empt.countf <- ifelse(
              (data.model.temp_2[[variableX.1]] %in% constraints.levels[[var1.nb]][[pair.nb]] &
                 data.model.temp_2[[variableO.2]] %in% constraints.levels[[var2.nb]][[pair.nb]]) |
                (data.model.temp_2[[variableX.2]] %in% constraints.levels[[var1.nb]][[pair.nb]] &
                   data.model.temp_2[[variableO.1]] %in% constraints.levels[[var2.nb]][[pair.nb]])
              , 1, 0)
            
            # remove irrelevant observations and variables
            data.model.temp_2 <- data.model.temp_2[empt.countf == 0,
                                                   -match(c(variableX.1, variableX.2, variableO.1, variableO.2),
                                                          colnames(data.model.temp_2))]
            
            # append to block-diagonal matrix
            data.model <- rbind(data.model, data.model.temp_2)
            
          n.subdata <- n.subdata + 1
        
      }
    }
    
    # delete controls (if applicable) and columns with zeros only
    if (adjust == FALSE & length(constrained_vars) > 0) {
      data.model <- data.model[, -which(substr(colnames(data.model), 1, 2) == "c_")]
    }
    data.model <- data.model[, apply(data.model, 2, function(x) !all(x == 0))]
    
    
    ## estimation
    
    # estimate the model on the big block-diagonal matrix
    model <- lm(outcome ~ 0 + ., data = data.model[, -match("clust", names(data.model))])
    
    # export coefficients
    coef <- coef(model)[-c(1:N.subdata)]
    coef.df <- data.frame(name = gsub("`", '', names(coef)),
                          coef,
                          variable = NA,
                          level1 = NA,
                          level2 = NA,
                          subgroup = NA)
    coef.df <- coef.df[substr(coef.df$name, 1, 2) != "c_",]
    coef.df$variable <- str_extract(coef.df$name, ".+?(?=\\.)")
    coef.df$variable <- ifelse(is.na(coef.df$variable),
                               str_extract(coef.df$name, ".+?(?=\\_)"),
                               coef.df$variable)
    coef.df$level1 <- str_extract(coef.df$name, "(?<=\\.).+?(?=\\-)")
    coef.df$level2 <- str_extract(coef.df$name, "(?<=\\-).+?(?=\\_)")
    coef.df$subgroup <- str_extract(coef.df$name, "(?<=\\_).+")
    
    # clustered variance-covariance matrix
    # when respondents are given several tasks, we only need to cluster at the
    # respondent level; the task level can be ignored since it is nested in the
    # respondent level
    N <- nrow(data.model)           
    r <- model$rank
    N.id <- length(unique(data.model$clust))
    dfc <- (N.id/(N.id - 1)) * ((N - 1)/(N - r))
    uj  <- apply(estfun(model), 2, function(x) tapply(x, as.character(data.model$clust), sum))
    vcov <- dfc * sandwich(model, meat = crossprod(uj)/N)
    vcov <- vcov[-c(1:N.subdata), -c(1:N.subdata)]
    
    # remove controls' coefficients
    if (adjust == TRUE) {
      id.controls <- which(substr(gsub("`", '', colnames(vcov)), 1, 2) == "c_")
      coef <- coef[-id.controls]
      vcov <- vcov[-id.controls,-id.controls]
    }
    
    
    ## dcsp weights
    
    # DCSPs are linear combinations of the coefficients previously estimated. This
    # section constructs a matrix of weights, in which each column is the vector of
    # coefficients of one DCSP DCSPs will eventually be obtained by multiplying the
    # matrix of coefficients and the vector of OLS estimates. The variance-covariance
    # matrix is calculated by multiplying the matrix of coefficients and the
    # variance-covariance matrix of the OLS estimates.
    
    calc.wt <- NULL
    
    # loop over attributes
    for (var in var.x) {
      
      # continuous variable
      if (class(data.long[[var]]) == "numeric") {
        calc.wt.temp <- ifelse(coef.df[["variable"]] == var & coef.df[["subgroup"]] == "1", 1, 0) -
          ifelse(coef.df[["variable"]] == var & coef.df[["subgroup"]] == "2", 1, 0)
        calc.wt <- as.data.frame(cbind(calc.wt, calc.wt.temp))
        colnames(calc.wt)[match("calc.wt.temp", names(calc.wt))] <- var
        
      # categorical variable
      } else {
        
        var.nb <- ifelse(is.na(match(var, constrained_vars)), 0, match(var, constrained_vars))
        var1.levels <- as.character(1:nlevels(as.factor(data.long[[var]])))
        
        # loop over attribute's levels
        for (cat.var1 in var1.levels) {
          
          # determine sign of subgroup 1 profiles
          grp1.wt.temp <- ifelse(coef.df[["variable"]] == var &
                                   coef.df[["level1"]] == cat.var1 &
                                   coef.df[["subgroup"]] == "1", 1,
                                 ifelse(coef.df[["variable"]] == var &
                                          coef.df[["level2"]] == cat.var1 &
                                          coef.df[["subgroup"]] == "1", -1, 0))
          
          # determine sign of subgroup 2 profiles
          grp2.wt.temp <- ifelse(coef.df[["variable"]] == var &
                                   coef.df[["level1"]] == cat.var1 &
                                   coef.df[["subgroup"]] == "2", 1,
                                 ifelse(coef.df[["variable"]] == var &
                                          coef.df[["level2"]] == cat.var1 &
                                          coef.df[["subgroup"]] == "2", -1, 0))
          
          # total weight
          calc.wt.temp <- 1/(length(var1.levels) - 1) * (grp1.wt.temp - grp2.wt.temp)
          calc.wt <- as.data.frame(cbind(calc.wt, calc.wt.temp))
          colnames(calc.wt)[match("calc.wt.temp", names(calc.wt))] <- paste(var, cat.var1, sep = ".")
          
        }
      }
      
    }
    
    
    ## dcsp calculation
    
    # coefficient names
    # replace labels' aliases by labels
    dcsp.names <- NULL
    for (var in var.x) {
      if (class(data.long[[var]]) != "factor") {
        dcsp.names <- c(dcsp.names, var)
      } else {
        for (cat in 1:length(var.levels[[match(var, var.x)]])) {
          dcsp.names <- c(dcsp.names, paste(var, var.levels[[match(var, var.x)]][cat],
                                            sep = "."))
        }
      }
    }
    
    # convert the matrix of coefficients (a data frame) into a matrix object
    calc.wt <- as.matrix(calc.wt)
    
    # point estimates
    dcsp.coef <- t(calc.wt) %*% coef
    dcsp.coef <- as.vector(dcsp.coef)
    names(dcsp.coef) <- dcsp.names
    
    # variance covariance matrix
    dcsp.vcov <- t(calc.wt) %*% vcov %*% calc.wt
    colnames(dcsp.vcov) <- dcsp.names
    rownames(dcsp.vcov) <- dcsp.names
    
    # output
    if (!is.null(constraints.levels)) {
      for (res in 1:length(constraints.levels[[1]])) {
        res.1 <- constraints.levels[[1]][res]
        res.2 <- constraints.levels[[2]][res]
        constraints.levels[[3]][[res]] <- var.levels[[match(res.1, var.x)]][as.numeric(constraints.levels[[3]][[res]])]
        constraints.levels[[4]][[res]] <- var.levels[[match(res.2, var.x)]][as.numeric(constraints.levels[[4]][[res]])]
      }
    }
    out <- list(estimate = dcsp.coef,
                vcov = dcsp.vcov,
                estimand = "dcsp",
                subgroups = list(var = by,
                                 ref = subgroup.levels[1],
                                 oth = subgroup.levels[2]),
                n.profiles = nrow(data.wide) * 2,
                n.tasks = nrow(data.wide),
                n.respondents = length(unique(data.model$clust)),
                constraints = constraints.levels)
    
    
    
    ############################
    ############ P #############
    ############################
  } else if (type == "p") {
    
    data.prep <- data.prep[, -match("subgroup", colnames(data.prep))]
    
    
    ## create regression data set
    
    # Under conditionally independent randomization, each attribute's pairwise 
    # probability has to be computed based on estimates from a separate model. In
    # order to obtain a variance-covariance matrix, however, all models should be
    # estimated at the same time. To do this, a data set with the relevant variables
    # and an intercept is created for each model. Then, all of these data sets are
    # used as blocks in a block-diagonal matrix, on which an OLS regression can be
    # run. This matrix is ofdimension (M*N) * (2 + M + sum(v_m)), where M is the
    # number of separate models to be estimated, N the number of pairs of profiles
    # compared during the experiment, and v_m the number of variables to be estimated
    # in model m. The term 2 represents the outcome and the cluster id variables.
    
    # empty data set
    N.subdata <- (length(unconstrained_vars) > 0) * 1 + length(constrained_vars)
    nb.col <- 2 + N.subdata + N.subdata * (ncol(data.prep) - 2)
    data.model <- data.frame(matrix(ncol = nb.col, nrow = 0))
    
    # column names (block-diagonal matrix)
    # c_ = nuisance variable (included to increase precision)
    # ..u = unconstrained variables
    # k = attribute id
    # m = level id
    base.names <- names(data.prep)[-c(1, ncol(data.prep))]
    if (length(unconstrained_vars) > 0 & length(constrained_vars) == 0) {
      namesvar.data.model <- paste0(base.names, "..")
    } else {
      if (length(unconstrained_vars)) {
        base.names.temp <- sapply(base.names, function(x) {
          if (str_extract(x, ".+?(?=\\.)") %in% constrained_vars) {
            return(paste0("c_", x, "..u"))
          } else {
            return(paste0(x, "..u"))
          }
        })
        namesvar.data.model <- base.names.temp
      }
      for (k in 1:length(constrained_vars)) {
        var <- constrained_vars[k]
        base.names.temp <- sapply(base.names, function(x) {
          if (str_extract(paste0(x, "."), ".+?(?=\\.)") != var) {
            return(paste0("c_", x, "..", k))
          } else {
            return(x)
          }
        })
        namesvar.data.model <- c(namesvar.data.model, base.names.temp)
      }
    }
    names(data.model) <- c("outcome", "clust", paste0("intercept.", seq(1:N.subdata)), namesvar.data.model)
    n.subdata <- 1
    
    # unconstrained variables block
    # fill the columns that are outside the block with zeros
    if (length(unconstrained_vars)) {
      
      data.model.temp <- data.frame(outcome = data.prep[["outcome"]],
                                    clust = data.prep[["clust"]])
      
      # intercept
      for (sub in 1:N.subdata) {
        data.model.temp$intercept.temp <- ifelse(n.subdata == sub, 1, 0)
        names(data.model.temp)[2+sub] <- paste0("intercept.", sub)
      }
      
      # variables
      data.model.temp <- cbind(data.model.temp, data.prep[,-c(1,ncol(data.prep))])
      zero <- matrix(0, ncol = ncol(data.model) - ncol(data.model.temp), nrow = nrow(data.model.temp))
      
      # merge intercept and variables
      data.model.temp <- cbind(data.model.temp, as.data.frame(zero))
      colnames(data.model.temp) <- colnames(data.model)
      
      # fill the empty data set
      data.model <- rbind(data.model, data.model.temp)
      
      n.subdata <- 2
      
    }
    
    # constrained variables block
    # fill the columns that are outside the block with zeros
    if (length(constrained_vars)) {
      
      # loop over attributes
      for (k in 1:length(constrained_vars)) {
        
        var <- constrained_vars[k]
        pair.nb <- match(var, str_extract(colnames(constrained_terms), var))
        pair.vars <- all.vars(as.formula(paste0("~", colnames(constrained_terms)[pair.nb])))
        var1.nb <- ifelse(constraints.levels[[1]][[pair.nb]] == var, 3, 4)
        var2.nb <- ifelse(constraints.levels[[1]][[pair.nb]] == var, 4, 3)
        var1.levels <- as.character(1:nlevels(as.factor(data.long[[var]])))
        variableX.1 <- paste0(var, ".1")
        variableX.2 <- paste0(var, ".2")
        variableO.1 <- paste0(pair.vars[pair.vars != var], ".1")
        variableO.2 <- paste0(pair.vars[pair.vars != var], ".2")
        
        # base data
        data.model.temp <- data.frame(outcome = data.prep[["outcome"]],
                                      clust = data.prep[["clust"]])
        
        # intercepts
        for (sub in 1:N.subdata) {
          data.model.temp$intercept.temp <- ifelse(n.subdata == sub, 1, 0)
          names(data.model.temp)[2+sub] <- paste0("intercept.", sub)
        }
        
        # variables
        zero <- matrix(0, ncol = (n.subdata - 1) * (ncol(data.prep) - 2), nrow = nrow(data.model.temp))
        data.model.temp <- cbind(data.model.temp, as.data.frame(zero))
        data.model.temp <- cbind(data.model.temp, data.prep[,-c(1,ncol(data.prep))])
        zero <- matrix(0, ncol = ncol(data.model) - ncol(data.model.temp), nrow = nrow(data.model.temp))
        data.model.temp <- cbind(data.model.temp, as.data.frame(zero))
        colnames(data.model.temp) <- colnames(data.model)
        
        # select relevant observations
        data.model.temp <- cbind(data.model.temp,
                                 data.wide[,which(str_extract(colnames(data.wide), ".+?(?=\\.)") %in% pair.vars)])
        
        # flag empty counterfactuals
        empt.countf <- ifelse(
          (data.model.temp[[variableX.1]] %in% constraints.levels[[var1.nb]][[pair.nb]] &
             data.model.temp[[variableO.2]] %in% constraints.levels[[var2.nb]][[pair.nb]]) |
            (data.model.temp[[variableX.2]] %in% constraints.levels[[var1.nb]][[pair.nb]] &
               data.model.temp[[variableO.1]] %in% constraints.levels[[var2.nb]][[pair.nb]])
          , 1, 0)
        
        # remove irrelevant observations and variables
        data.model.temp <- data.model.temp[empt.countf == 0,
                                           -match(c(variableX.1, variableX.2, variableO.1, variableO.2),
                                                  colnames(data.model.temp))]
        
        # append to block-diagonal matrix
        data.model <- rbind(data.model, data.model.temp)
        n.subdata <- n.subdata + 1
        
      }
    }
    
    # delete controls (if applicable) and columns with zeros only
    if (adjust == FALSE & length(constrained_vars) > 0) {
      data.model <- data.model[, -which(substr(colnames(data.model), 1, 2) == "c_")]
    }
    data.model <- data.model[, apply(data.model, 2, function(x) !all(x == 0))]
    
    
    ## estimation
    
    # estimate model
    model <- lm(outcome ~ 0 + ., data = data.model[, -match("clust", names(data.model))])
    coef <- coef(model)[-c(1:N.subdata)]
    
    # clustered variance-covariance matrix
    # when respondents are given several tasks, we only need to cluster at the
    # respondent level; the task level can be ignored since it is nested in the
    # respondent level (the demonstration is straightforward)
    N <- nrow(data.model)           
    r <- model$rank
    N.id <- length(unique(data.model$clust))
    dfc <- (N.id/(N.id - 1)) * ((N - 1)/(N - r))
    uj  <- apply(estfun(model), 2, function(x) tapply(x, as.character(data.model$clust), sum))
    vcov <- dfc * sandwich(model, meat = crossprod(uj)/N)
    vcov <- vcov[-c(1:N.subdata), -c(1:N.subdata)]
    
    # remove controls' coefficients
    if (adjust == TRUE) {
      id.controls <- which(substr(gsub("`", '', colnames(vcov)), 1, 2) == "c_")
      coef <- coef[-id.controls]
      vcov <- vcov[-id.controls,-id.controls]
    }
    
    
    ## pairwise selection probability calculation
    
    # coefficient names
    # replace labels' aliases by labels
    p.names <- NULL
    for (var in c(unconstrained_vars,constrained_vars)) {
      if (class(data.long[[var]]) != "factor") {
        p.names <- c(p.names, var)
      } else {
        for (cat1 in 1:length(var.levels[[match(var, var.x)]])) {
          for (cat2 in 1:length(var.levels[[match(var, var.x)]])) {
            if (cat1 < cat2) {
              p.names <- c(p.names, paste0(var, ".", var.levels[[match(var, var.x)]][cat1], "-",
                                           var.levels[[match(var, var.x)]][cat2]))
            }
          }
        }
      }
    }
    
    # point estimates
    p.coef <- coef
    names(p.coef) <- p.names
    
    # variance covariance matrix
    p.vcov <- vcov
    colnames(p.vcov) <- p.names
    rownames(p.vcov) <- p.names
    
    # output
    if (!is.null(constraints.levels)) {
      for (res in 1:length(constraints.levels[[1]])) {
        res.1 <- constraints.levels[[1]][res]
        res.2 <- constraints.levels[[2]][res]
        constraints.levels[[3]][[res]] <- var.levels[[match(res.1, var.x)]][as.numeric(constraints.levels[[3]][[res]])]
        constraints.levels[[4]][[res]] <- var.levels[[match(res.2, var.x)]][as.numeric(constraints.levels[[4]][[res]])]
      }
    }
    out <- list(estimate = p.coef,
                vcov = p.vcov,
                estimand = "p",
                subgroups = NULL,
                n.profiles = nrow(data.wide) * 2,
                n.tasks = nrow(data.wide),
                n.respondents = length(unique(data.model$clust)),
                constraints = constraints.levels)
    
    
  }
  
  ###########################
  ########### OUT ###########
  ###########################
  class(out) <- "amcep"
  return(out) 
  
}


#######################################
########### PRINT & SUMMARY ###########
#######################################

# print
print.amcep <- function(x, digits = max(3L, getOption("digits") - 3L))
{
  if(length(x$estimate)) {
    if (x$estimand == "amcep") {
      cat("\nAMCEPs:\n")
    } else if (x$estimand == "dcsp") {
      cat("\nDCSPs",
          paste0("(", x$subgroups$ref, " - ", x$subgroups$oth, "):\n"))
    } else {
      cat("\nPairwise Preferences (p):\n")
    }
    print.default(format(x$estimate, digits = digits),
                  print.gap = 2L, quote = FALSE)
  } else cat("No coefficients\n")
  cat("\n")
  invisible(x)
}

# summary
summary.amcep <- function(object)
{
  z <- object
  se <- sqrt(diag(z$vcov))
  est <- z$estimate
  zval <- est/se
  ans <- z[c("subgroups", "n.profiles", "n.tasks", "n.respondents",
             "estimand", "constraints")]
  ans$coefficients <-
    cbind(est, se, zval, 2*pnorm(abs(zval), lower.tail = FALSE))
  dimnames(ans$coefficients) <-
    list(names(z$estimate),
         c("Estimate", "Std. Error", "z value", "Pr(>|z|)"))
  class(ans) <- "summary.amcep"
  ans
}

# print.summary
print.summary.amcep <- function (x, digits = max(3L, getOption("digits") - 3L),
                                 signif.stars = getOption("show.signif.stars"))  {
  cat("\nNumber of profiles:", paste0(x$n.profiles))
  cat("\nNumber of tasks:", paste0(x$n.tasks))
  cat("\nNumber of respondents:", paste0(x$n.respondents))
  if (!is.null(x$constraints)) {
    cat("\nRestrictions:")
    if (length(x$constraints[[1]]) == 1) {
      cat("\n ", paste0("   [", x$constraints[[1]], "] ",
                        paste(unlist(x$constraints[[3]]), collapse = ", "),
                        "\n   & [", x$constraints[[2]], "] ",
                        paste(unlist(x$constraints[[4]]), collapse = ", ")))
    } else {
      for (i in 1:length(x$constraints[[1]])) {
        cat("\n ", paste0(i, ".   [", x$constraints[[1]][i], "] ",
                          paste(unlist(x$constraints[[3]][i]), collapse = ", "),
                          "\n     & [", x$constraints[[2]][i], "] ",
                          paste(unlist(x$constraints[[4]][i]), collapse = ", ")))
      }
    }
  }
  if (x$estimand == "dcsp") {
    cat("\nSubgroups:", paste0(x$subgroups$var))
  }
  cat("\n\n")
  if (x$estimand == "amcep") {
    cat("\nAMCEPs:\n")
  } else if (x$estimand == "dcsp") {
    cat("\nDCSPs",
        paste0("(", x$subgroups$ref, " - ", x$subgroups$oth, "):\n"))
  } else {
    cat("\nPairwise Preferences (p):\n")
  }
  coefs <- x$coefficients
  printCoefmat(coefs, digits, signif.stars = signif.stars,
               na.print = "NA")
  cat("\n")
  cat("\n")
  invisible(x)
}
